


****************************************************************
****************************************************************

cd /projects/data_external/lbd/data
use lbd_slim, clear

drop if missing(lbdfid)

gen estabs = 1
collapse (sum) estabs pay bds_emp, by(lbdfid year bds_vcnaics) fast

gen i2 = substr(bds_vcnaics, 1, 2)
gen i3 = substr(bds_vcnaics, 1, 3)
gen i4 = substr(bds_vcnaics, 1, 4)
gen i5 = substr(bds_vcnaics, 1, 5)
gen i6 = substr(bds_vcnaics, 1, 6)

set seed 4535434
gen double r1 = runiform()
gen double r2 = runiform()
gen double r3 = runiform()
replace r1 = 0 if missing(bds_vcnaics)
replace r2 = 0 if missing(bds_vcnaics)
replace r3 = 0 if missing(bds_vcnaics)

foreach i of num 2(1)6 {

bysort lbdfid year i`i': egen i`i'_bds_emp = sum(bds_emp)
bysort lbdfid year i`i': egen i`i'_pay = sum(pay)
bysort lbdfid year i`i': egen i`i'_estabs = sum(estabs)

gsort lbdfid year -i`i'_bds_emp -i`i'_pay -i`i'_estabs r1 r2 r3

bysort lbdfid year: drop if substr(bds_vcnaics, 1, `i') != i`i'[1]

}

mdesc
duplicates report lbdfid year

rename bds_vcnaics bds_vcnaics_dom

*keep lbdfid year bds_vcnaics
sort lbdfid year
compress
cd /projects/data_external/lbd/data
save dominant_naics_lbdfid, replace

****************************************************************
****************************************************************
